PC World Komputer 2010 April

home *** CD-ROM | disk | FTP | other *** search

/ PC World Komputer 2010 April / PCWorld0410.iso / hity wydania / Ubuntu 9.10 PL / karmelkowy-koliberek-desktop-9.10-i386-PL.iso / casper / filesystem.squashfs / usr / lib / python2.6 / urlparse.pyc (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2009-11-11 | 14KB | 500 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) '''Parse (absolute and relative) URLs. See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. ''' __all__ = [ 'urlparse', 'urlunparse', 'urljoin', 'urldefrag', 'urlsplit', 'urlunsplit', 'parse_qs', 'parse_qsl'] uses_relative = [ 'ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', 'prospero', 'rtsp', 'rtspu', '', 'sftp'] uses_netloc = [ 'ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp'] non_hierarchical = [ 'gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = [ 'ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] uses_query = [ 'http', 'wais', 'imap', 'https', 'shttp', 'mms', 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = [ 'ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', 'file', 'prospero', ''] scheme_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.' MAX_CACHE_SIZE = 20 _parse_cache = { } def clear_cache(): '''Clear the parse cache.''' _parse_cache.clear() class ResultMixin(object): '''Shared methods for the parsed result objects.''' def username(self): netloc = self.netloc if '@' in netloc: userinfo = netloc.rsplit('@', 1)[0] if ':' in userinfo: userinfo = userinfo.split(':', 1)[0] return userinfo username = property(username) def password(self): netloc = self.netloc if '@' in netloc: userinfo = netloc.rsplit('@', 1)[0] if ':' in userinfo: return userinfo.split(':', 1)[1] password = property(password) def hostname(self): netloc = self.netloc if '@' in netloc: netloc = netloc.rsplit('@', 1)[1] if ':' in netloc: netloc = netloc.split(':', 1)[0] if not netloc.lower(): pass hostname = property(hostname) def port(self): netloc = self.netloc if '@' in netloc: netloc = netloc.rsplit('@', 1)[1] if ':' in netloc: port = netloc.split(':', 1)[1] return int(port, 10) port = property(port) from collections import namedtuple class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): __slots__ = () def geturl(self): return urlunsplit(self) class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): __slots__ = () def geturl(self): return urlunparse(self) def urlparse(url, scheme = '', allow_fragments = True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" tuple = urlsplit(url, scheme, allow_fragments) (scheme, netloc, url, query, fragment) = tuple if scheme in uses_params and ';' in url: (url, params) = _splitparams(url) else: params = '' return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: i = url.find(';', url.rfind('/')) if i < 0: return (url, '') else: i = url.find(';') return (url[:i], url[i + 1:]) def _splitnetloc(url, start = 0): delim = len(url) for c in '/?#': wdelim = url.find(c, start) if wdelim >= 0: delim = min(delim, wdelim) continue return (url[start:delim], url[delim:]) def urlsplit(url, scheme = '', allow_fragments = True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" allow_fragments = bool(allow_fragments) key = (url, scheme, allow_fragments, type(url), type(scheme)) cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: if url[:i] == 'http': scheme = url[:i].lower() url = url[i + 1:] if url[:2] == '//': (netloc, url) = _splitnetloc(url, 2) if allow_fragments and '#' in url: (url, fragment) = url.split('#', 1) if '?' in url: (url, query) = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v for c in url[:i]: if c not in scheme_chars: break continue url[:i] == 'http' else: scheme = url[:i].lower() url = url[i + 1:] if scheme in uses_netloc and url[:2] == '//': (netloc, url) = _splitnetloc(url, 2) if allow_fragments and scheme in uses_fragment and '#' in url: (url, fragment) = url.split('#', 1) if scheme in uses_query and '?' in url: (url, query) = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v def urlunparse(data): '''Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had redundant delimiters, e.g. a ? with an empty query (the draft states that these are equivalent).''' (scheme, netloc, url, params, query, fragment) = data if params: url = '%s;%s' % (url, params) return urlunsplit((scheme, netloc, url, query, fragment)) def urlunsplit(data): (scheme, netloc, url, query, fragment) = data if (netloc or scheme) and scheme in uses_netloc and url[:2] != '//': if url and url[:1] != '/': url = '/' + url if not netloc: pass url = '//' + '' + url if scheme: url = scheme + ':' + url if query: url = url + '?' + query if fragment: url = url + '#' + fragment return url def urljoin(base, url, allow_fragments = True): '''Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.''' if not base: return url if not url: return base (bscheme, bnetloc, bpath, bparams, bquery, bfragment) = urlparse(base, '', allow_fragments) (scheme, netloc, path, params, query, fragment) = urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: return url if path[:1] == '/': return urlunparse((scheme, netloc, path, params, query, fragment)) if not path: path = bpath if not params: params = bparams else: path = path[:-1] return urlunparse((scheme, netloc, path, params, query, fragment)) return urlunparse((scheme, netloc, path, params, query, fragment)) segments = bpath.split('/')[:-1] + path.split('/') while '.' in segments: segments.remove('.') continue url if scheme in uses_netloc else scheme not in uses_relative if segments[-1] == '.' else path[:1] == '/' while None: i = 1 n = len(segments) - 1 while i < n: if segments[i] == '..' and segments[i - 1] not in ('', '..'): del segments[i - 1:i + 1] break i = i + 1 break continue if segments == [ '', '..']: segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [ ''] return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment)) def urldefrag(url): '''Removes any existing fragment from URL. Returns a tuple of the defragmented URL and the fragment. If the URL contained no fragments, the second element is the empty string. ''' if '#' in url: (s, n, p, a, q, frag) = urlparse(url) defrag = urlunparse((s, n, p, a, q, '')) return (defrag, frag) return (url, '') _hextochr = dict((lambda .0: for i in .0: ('%02x' % i, chr(i)))(range(256))) _hextochr.update((lambda .0: for i in .0: ('%02X' % i, chr(i)))(range(256))) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') for i in xrange(1, len(res)): item = res[i] try: res[i] = _hextochr[item[:2]] + item[2:] continue except KeyError: res[i] = '%' + item continue except UnicodeDecodeError: res[i] = unichr(int(item[:2], 16)) + item[2:] continue return ''.join(res) def parse_qs(qs, keep_blank_values = 0, strict_parsing = 0): '''Parse a query given as a string argument. Arguments: qs: URL-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in URL encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. ''' dict = { } for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): if name in dict: dict[name].append(value) continue dict[name] = [ value] return dict def parse_qsl(qs, keep_blank_values = 0, strict_parsing = 0): '''Parse a query given as a string argument. Arguments: qs: URL-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in URL encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. Returns a list, as G-d intended. ''' pairs = [ s2 for s1 in qs.split('&') for s2 in s1.split(';') ] r = [] for name_value in pairs: nv = name_value.split('=', 1) if len(nv[1]) or keep_blank_values: name = unquote(nv[0].replace('+', ' ')) value = unquote(nv[1].replace('+', ' ')) r.append((name, value)) continue None if len(nv) != 2 else None if not name_value and not strict_parsing else [] return r test_input = '\n http://a/b/c/d\n\n g:h = <URL:g:h>\n http:g = <URL:http://a/b/c/g>\n http: = <URL:http://a/b/c/d>\n g = <URL:http://a/b/c/g>\n ./g = <URL:http://a/b/c/g>\n g/ = <URL:http://a/b/c/g/>\n /g = <URL:http://a/g>\n //g = <URL:http://g>\n ?y = <URL:http://a/b/c/d?y>\n g?y = <URL:http://a/b/c/g?y>\n g?y/./x = <URL:http://a/b/c/g?y/./x>\n . = <URL:http://a/b/c/>\n ./ = <URL:http://a/b/c/>\n .. = <URL:http://a/b/>\n ../ = <URL:http://a/b/>\n ../g = <URL:http://a/b/g>\n ../.. = <URL:http://a/>\n ../../g = <URL:http://a/g>\n ../../../g = <URL:http://a/../g>\n ./../g = <URL:http://a/b/g>\n ./g/. = <URL:http://a/b/c/g/>\n /./g = <URL:http://a/./g>\n g/./h = <URL:http://a/b/c/g/h>\n g/../h = <URL:http://a/b/c/h>\n http:g = <URL:http://a/b/c/g>\n http: = <URL:http://a/b/c/d>\n http:?y = <URL:http://a/b/c/d?y>\n http:g?y = <URL:http://a/b/c/g?y>\n http:g?y/./x = <URL:http://a/b/c/g?y/./x>\n' def test(): import sys as sys base = '' if sys.argv[1:]: fn = sys.argv[1] if fn == '-': fp = sys.stdin else: fp = open(fn) else: try: StringIO = StringIO import cStringIO except ImportError: StringIO = StringIO import StringIO fp = StringIO(test_input) for line in fp: words = line.split() if not words: continue url = words[0] parts = urlparse(url) print '%-10s : %s' % (url, parts) abs = urljoin(base, url) if not base: base = abs wrapped = '<URL:%s>' % abs print '%-10s = %s' % (url, wrapped) if len(words) == 3 and words[1] == '=': if wrapped != words[2]: print 'EXPECTED', words[2], '!!!!!!!!!!' wrapped != words[2] if __name__ == '__main__': test()